clear
clear global
currentfolder='C:\Users\tbold\Dropbox\Tessa and Tobi\JEEA Replication';    
filetosave='evidence_of_groups_vill_P2_Mar21'; % name of workspace saved
village=2;
datafile='perm_vill_2';
outputpath=[currentfolder '\Data\Output'];
programpath=[currentfolder '\Programmes'];

cd(outputpath)
eval(['load ' datafile '.out;']);
eval(['HID=' datafile '(:,1);']);
eval(['YEAR=' datafile '(:,2);']);
eval(['DCONS=' datafile '(:,3);']);
eval(['DCONSRES=' datafile '(:,4);']);
cd(programpath)
DATA=[HID YEAR DCONS DCONSRES];


CONSDATA=DATA(:,4);
years=6;
perms=10000;

households=37;

for hh1=1:households
    for hh2=1:households
        index1=(hh1-1)*years;
        index2=(hh2-1)*years;
        X1=CONSDATA(index1+1:index1+years,1);
        X2=CONSDATA(index2+1:index2+years,1);
        [a,b]=corrcoef(X1,X2);
        CONS_CORR(hh1,hh2)=a(2,1);
    end
end
CONS_CORR_RESHAPE=reshape(CONS_CORR,37*37,1);
%%%%and now compare this to the correlations you get in the model generated
%%%%data
load('permmodel2.mat')
DC_LOG_COND_ALL=DC_LOG_COND2;
%this one is done with an `aggregate' income process for the rest of the
%village.
load('permmodel2_village_RS.mat')
DC_LOG_COND_ALL(1:size(DC_LOG_COND2,1),:,179,:)=DC_LOG_COND2(:,:,:);
clear DC_LOG_COND2

tic
%%%for village 1
vss_high_inddev=37-1;                 % the number of total village members is vss_high_inddev+1 under individual deviations
vss_high_coaldev=37-1;

groupsizeset=[4 vss_high_inddev+1 180]; 
PERMS=10000;

for pp=1:1000
for Qbeta=1:2

perms=PERMS(1);
CONS_CORR_MODEL2=nan(vss_high_inddev+1,vss_high_inddev+1,length(groupsizeset),perms);
for perm=1:perms
perm, pp


for gg=1:length(groupsizeset)
groupsize=groupsizeset(gg);
vss=groupsize-1;

census=180;
if perm==1
AA=[1:census]';    
elseif perm>1 
rng(pp*1000000+perm*100000,'twister'); 
AA=randperm(census)';
end
sample=vss_high_inddev+1; %%sample 37 households from the `180'
rng(pp,'twister'); 
tstart=randi(size(DC_LOG_COND_ALL,2)-10); %%sample them for six consecutive periods starting in tstart.

        X1=DC_LOG_COND_ALL(AA(1:sample),tstart:tstart+6,vss,Qbeta)';
        X2=DC_LOG_COND_ALL(AA(1:sample),tstart:tstart+6,vss,Qbeta)';
        CONS_CORR_MODEL2(1:sample,1:sample,gg,perm)=corr(X1,X2);
        SAMPLE(:,gg,perm)=AA(1:sample);
    
    Nvill=ceil((vss_high_inddev+1)/(groupsize))*6; %%%%note that this is for village 2

end
end

toc

%%%and the KS-test%%%
%Data
CONS_CORR_VECTOR=reshape(CONS_CORR,1,households^2);
CONS_CORR_VECTOR_SORT=sort(CONS_CORR_VECTOR);
%Model
for gg=1:length(groupsizeset)
    for perm=1:perms
CONS_CORR_MODEL2_VECTOR=reshape(CONS_CORR_MODEL2(1:households,1:households,gg,perm),1,households^2);
CONS_CORR_MODEL2_VECTOR_SORT=sort(CONS_CORR_MODEL2_VECTOR);

[h,p,ks2stat] = kstest2(CONS_CORR_VECTOR_SORT,CONS_CORR_MODEL2_VECTOR_SORT);
goodnessoffit(gg,perm,Qbeta,pp)=ks2stat;
preject(gg,perm,Qbeta,pp)=p;
average_corr(gg,perm,Qbeta,pp)=mean(CONS_CORR_MODEL2_VECTOR_SORT);
variance_corr(gg,perm,Qbeta,pp)=mean(CONS_CORR_MODEL2_VECTOR_SORT);

%and these help to pick the permutation with the best fit
DEV(gg,perm)=sum(sum((CONS_CORR_MODEL2(1:households,1:households,gg,perm)-CONS_CORR(:,:)).^2))/(households^2);
DEV2(gg,perm)=[CONS_CORR_VECTOR_SORT-CONS_CORR_MODEL2_VECTOR_SORT]*[CONS_CORR_VECTOR_SORT-CONS_CORR_MODEL2_VECTOR_SORT]'/(households^2);

    
end
    
[CC,II]=min(DEV(gg,:));
MINCRITERIONWHO(gg,Qbeta,pp)=DEV(gg,II);
SAMPLECENSUSWHO(:,:,gg,Qbeta,pp)=SAMPLE(:,gg,II);
CORR_MIN(:,:,gg,Qbeta,pp)=CONS_CORR_MODEL2(:,:,gg,II);

[CC,II]=min(DEV2(gg,:));
MINCRITERIONWHO2(gg,Qbeta,pp)=DEV2(gg,II);
SAMPLECENSUSWHO2(:,:,gg,Qbeta,pp)=SAMPLE(:,gg,II);
CORR_MIN2(:,:,gg,Qbeta,pp)=CONS_CORR_MODEL2(:,:,gg,II);



end
end
if  pp==10  | pp==20  | pp==30 | pp==40 | pp==50 | pp==60 | pp==70 | pp==80 | pp==90 | pp==100
cd(outputpath)
eval(['save ', filetosave, int2str(village) '.mat'])
cd(programpath)
end
end

clear CONS_CORR_MODEL2
cd(outputpath)
eval(['save ', filetosave, int2str(village) '.mat'])
cd(programpath)

%%%and find the entries for Table 6, n=4, delta==.94
for pp=1:1000
[CC,II]=min(goodnessoffit(1,:,2,pp));
minindex(pp)=II;
minvalue(pp)=CC;
end
%best fit for each pp
for pp=1:1000
goodnessoffit_min(pp)=goodnessoffit(1,minindex(pp),2,pp);  
preject_min(pp)=preject(1,minindex(pp),2,pp);
end
%Table 6, entry for n=4, delta=0.94
%average of best fit over all pp
mean(goodnessoffit_min)
mean(preject_min)
%best fit over best fit over all pp
[CC,II]=min(goodnessoffit_min);
goodnessoffit_min(II)
preject_min(II)

%%%and find the entries for the table, Table 6, entry for n=37, delta=0.9
for pp=1:1000
[CC,II]=min(goodnessoffit(2,:,1,pp));
minindex(pp)=II;
minvalue(pp)=CC;
end
for pp=1:1000
goodnessoffit_min(pp)=goodnessoffit(2,minindex(pp),1,pp);  
preject_min(pp)=preject(2,minindex(pp),1,pp);
end
%average of best fit over all pp
mean(goodnessoffit_min)
mean(preject_min)
%best fit over best fit over all pp
[CC,II]=min(goodnessoffit_min);
CC
goodnessoffit_min(II)
preject_min(II)

%%entries for the Table n=180, delta=0.9
for pp=1:1000
[CC,II]=min(goodnessoffit(3,:,1,pp));
minindex(pp)=II;
minvalue(pp)=CC;
end
for pp=1:1000
goodnessoffit_min(pp)=goodnessoffit(3,minindex(pp),1,pp);  
preject_min(pp)=preject(3,minindex(pp),1,pp);
end
%average of best fit over all pp
mean(goodnessoffit_min)
mean(preject_min)
%best fit over best fit over all pp
[CC,II]=min(goodnessoffit_min);
goodnessoffit_min(II)
preject_min(II)



